## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## here() starts at /Users/Jo/OneDrive/1_Hertie Studies/Thesis/Hertie-Thesis-Mehler
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
## corrplot 0.92 loaded
## Rows: 1079 Columns: 29
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): ResponseId, cluster, hate_definition, academic_status, educ_cat, g...
## dbl (14): leftright, leftright_pred_error, readability_score, text_length, t...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# select indicators
data_indicators <- data %>%
select(cluster, text_length_log2, readability_score, leftright_pred_error) %>% # add cluster/type indicator later
drop_na() %>%
filter(readability_score < 40) # remove 2 extreme outliers
# select controls (categorical)
controls_cat <- data %>% select(academic_status, gender, age_cat, polinterest_cat_3, empathy_pc_cat, exp_hate_speech_cat, exp_hostile_engagement_cat, commitment_cat) %>% drop_na()
# select controls (numerical)
controls_num <- data %>% select(academic_status, age10, polinterest, empathy_pc, exp_hate_speech, exp_hostile_engagement, commitment_log2) %>% drop_na()# select indicators in a different order (continous variables first)
indicators <- c("text_length_log2", "readability_score", "leftright_pred_error", "cluster")
ggpairs(data_indicators, columns = indicators)## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# chose only numeric indicators
data_indicators_num <- data_indicators %>%
select(text_length_log2, readability_score, leftright_pred_error)
cor_matrix <- cor(data_indicators_num, method = "pearson")
cor_matrix## text_length_log2 readability_score leftright_pred_error
## text_length_log2 1.00000000 0.43375846 0.02236707
## readability_score 0.43375846 1.00000000 0.09965684
## leftright_pred_error 0.02236707 0.09965684 1.00000000
controls_num_selection <- controls_num %>% select(-academic_status)
cor_matrix <- cor(controls_num_selection, method = "spearman")
cor_matrix## age10 polinterest empathy_pc exp_hate_speech
## age10 1.0000000 0.22506279 0.10992547 -0.13455076
## polinterest 0.2250628 1.00000000 -0.06930434 0.08349955
## empathy_pc 0.1099255 -0.06930434 1.00000000 -0.11562140
## exp_hate_speech -0.1345508 0.08349955 -0.11562140 1.00000000
## exp_hostile_engagement 0.1249221 0.29834697 0.03949850 0.26893914
## commitment_log2 0.1675874 -0.04808784 0.04524649 -0.18552808
## exp_hostile_engagement commitment_log2
## age10 0.12492209 0.16758736
## polinterest 0.29834697 -0.04808784
## empathy_pc 0.03949850 0.04524649
## exp_hate_speech 0.26893914 -0.18552808
## exp_hostile_engagement 1.00000000 -0.06618384
## commitment_log2 -0.06618384 1.00000000
cor_matrix %>% corrplot(method = "color", type = "lower", tl.cex = 1, tl.col = "black", addCoef.col = "black")# Relationship between EDU and Experience with Online hostile Engagement
crosstab(data$exp_hostile_engagement_cat, data$educ_cat, prop.c = TRUE)## Cell Contents
## |-------------------------|
## | Count |
## | Column Percent |
## |-------------------------|
##
## =======================================================================
## data$educ_cat
## data$exp_hostile_engagement_cat High Intermediate Low Total
## -----------------------------------------------------------------------
## Less experience 260 83 75 418
## 42.8% 33.7% 35.0%
## -----------------------------------------------------------------------
## More experience 347 163 139 649
## 57.2% 66.3% 65.0%
## -----------------------------------------------------------------------
## Total 607 246 214 1067
## 56.9% 23.1% 20.1%
## =======================================================================
## Cell Contents
## |-------------------------|
## | Count |
## | Column Percent |
## |-------------------------|
##
## ================================================================
## data$educ_cat
## data$exp_hate_speech_cat High Intermediate Low Total
## ----------------------------------------------------------------
## Less experience 326 131 119 576
## 53.7% 53.3% 55.3%
## ----------------------------------------------------------------
## More experience 281 115 96 492
## 46.3% 46.7% 44.7%
## ----------------------------------------------------------------
## Total 607 246 215 1068
## 56.8% 23.0% 20.1%
## ================================================================
## Cell Contents
## |-------------------------|
## | Count |
## | Column Percent |
## |-------------------------|
##
## ==============================================================
## data$educ_cat
## data$polinterest_cat_3 High Intermediate Low Total
## --------------------------------------------------------------
## High 267 98 93 458
## 43.9% 39.8% 43.3%
## --------------------------------------------------------------
## Intermediate 242 115 81 438
## 39.8% 46.7% 37.7%
## --------------------------------------------------------------
## Low 99 33 41 173
## 16.3% 13.4% 19.1%
## --------------------------------------------------------------
## Total 608 246 215 1069
## 56.9% 23.0% 20.1%
## ==============================================================
## Cell Contents
## |-------------------------|
## | Count |
## | Column Percent |
## |-------------------------|
##
## ========================================================
## data$educ_cat
## data$polinterest High Intermediate Low Total
## --------------------------------------------------------
## 1 34 8 10 52
## 5.6% 3.3% 4.7%
## --------------------------------------------------------
## 2 65 25 31 121
## 10.7% 10.2% 14.4%
## --------------------------------------------------------
## 3 242 115 81 438
## 39.8% 46.7% 37.7%
## --------------------------------------------------------
## 4 267 98 93 458
## 43.9% 39.8% 43.3%
## --------------------------------------------------------
## Total 608 246 215 1069
## 56.9% 23.0% 20.1%
## ========================================================
## Cell Contents
## |-------------------------|
## | Count |
## | Column Percent |
## |-------------------------|
##
## ===========================================================
## data$educ_cat
## data$empathy_pc_cat High Intermediate Low Total
## -----------------------------------------------------------
## Less empathetic 340 124 99 563
## 56.4% 50.4% 46.3%
## -----------------------------------------------------------
## More empathetic 263 122 115 500
## 43.6% 49.6% 53.7%
## -----------------------------------------------------------
## Total 603 246 214 1063
## 56.7% 23.1% 20.1%
## ===========================================================
## Cell Contents
## |-------------------------|
## | Count |
## | Column Percent |
## |-------------------------|
##
## ======================================================
## data$educ_cat
## data$leftright High Intermediate Low Total
## ------------------------------------------------------
## 1 26 10 13 49
## 4.3% 4.1% 6.1%
## ------------------------------------------------------
## 2 20 9 10 39
## 3.3% 3.7% 4.7%
## ------------------------------------------------------
## 3 62 12 17 91
## 10.3% 5.0% 8.0%
## ------------------------------------------------------
## 4 47 24 12 83
## 7.8% 9.9% 5.7%
## ------------------------------------------------------
## 5 63 16 23 102
## 10.5% 6.6% 10.8%
## ------------------------------------------------------
## 6 141 48 51 240
## 23.4% 19.8% 24.1%
## ------------------------------------------------------
## 7 71 35 30 136
## 11.8% 14.5% 14.2%
## ------------------------------------------------------
## 8 70 30 23 123
## 11.6% 12.4% 10.8%
## ------------------------------------------------------
## 9 47 28 11 86
## 7.8% 11.6% 5.2%
## ------------------------------------------------------
## 10 16 16 9 41
## 2.7% 6.6% 4.2%
## ------------------------------------------------------
## 11 39 14 13 66
## 6.5% 5.8% 6.1%
## ------------------------------------------------------
## Total 602 242 212 1056
## 57.0% 22.9% 20.1%
## ======================================================
# Function to plot relationships between all pairs of variables in a dataframe
plot_relationships <- function(df) {
# Initialize list to store plots
plots <- list()
plot_count <- 1
col_names <- names(df)
# Loop through all unique pairs of variables
for (i in seq_len(ncol(df) - 1)) { # Exclude the last column 'gender' from the x-axis variables
for (j in (i+1):ncol(df)) {
x <- col_names[i]
y <- col_names[j]
# Create scatter plot for each pair
plot <- ggplot(df, aes_string(x = x, y = y)) +
geom_point(position = position_jitter(width = 0.2, height = 0.2), alpha = 0.6, color = "skyblue") +
geom_smooth(method = "lm", colour = "black", linewidth = 0.5) +
theme_minimal() +
labs(title = paste("Scatter plot between", x, "and", y))
# Store the plot in the list
plots[[plot_count]] <- plot
plot_count <- plot_count + 1
}
}
# Print plots
lapply(plots, print)
}# use only categorical variables
controls_cat <- controls_cat %>% select(-academic_status)
plot_relationships(controls_cat)## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## [[1]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[2]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[3]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[4]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[5]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[6]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[7]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[8]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[9]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[10]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[11]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[12]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[13]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[14]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[15]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[16]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[17]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[18]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[19]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[20]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[21]]
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## [[1]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[2]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[3]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[4]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[5]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[6]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[7]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[8]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[9]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[10]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[11]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[12]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[13]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[14]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[15]]
## `geom_smooth()` using formula = 'y ~ x'